In [41]:
import pandas as pd
import plotly.io as pio
import sys
sys.path.append('/Users/kevinanderson/Projects/az_takehome/problem_2')
import utilities
import simulation
import importlib
In [42]:
importlib.reload(simulation)
help(simulation.Simulation)
Help on class Simulation in module simulation: class Simulation(builtins.object) | Simulation( | sim_name: str, | n: int = 500, | n_visits: int = 10, | event_rate: float = 0.05, | event_rate_change_rate: float = 0.01, | cv_mean: float = 100, | cv_sd: float = 20, | cv_noise_sd: float = 3, | cv_change_over_time: float = -2, | patient_id_start_idx: int = 1, | relationship_coefficient: float = 0.1, | seed: int = 2948 | ) | | Parameters | ---------- | n : int | Desired number of patients. | | n_visits: int | Desired number of visits across the study. | | event_rate: float | range: (0 - 1) | Baseline event rate PER VISIT. | For instance, if the event rate is 0.10, then 10% of patients will experience an event at each visit. | | event_rate_change_rate: float | range: (0-1) | The rate at which the event rate changes over time. | For instance, if the event rate is 0.10 and the event_rate_change_rate is 0.01, then the event rate will increase by 1% each visit. | | cv_mean: float | The mean of the continuous variable. | | cv_sd: float | The standard deviation of the continuous variable. | | cv_change_over_time: float | range: (-inf, inf) | unit: CV standard deviations | Set the rate of change of the continuous variable over time. | The code will take this value and divide it by the number of visits to get the change at each visit. | For instance, if cv_change_over_time is -2, then the continuous variable will decrease by 2 standard deviations over the course of the study. | | cv_noise_sd: float | The standard deviation of the noise added to the continuous variable at each visit. | If this is set to zero, then the CV for each patient will be a perfect linear trajectory. | | relationship_coefficient: float | The strength of the relationship between the continuous variable and the event rate. | | If this is set to zero, then the event rate is independent of the continuous variable. | If this is set to a positive value, then the event rate increases as the continuous variable increases. | If this is set to a negative value, then the event rate decreases as the continuous variable increases. | | This directly maps to the beta coefficient in a logistic regression model that modulates the | relationship between the continuous variable and the event rate. | | patient_id_start_idx: int | The starting index for patient IDs. | This is useful if you want to simulate multiple arms and want to ensure that the patient IDs are unique across arms. | | seed: int | The random seed for reproducibility. | | Methods defined here: | | __init__( | self, | sim_name: str, | n: int = 500, | n_visits: int = 10, | event_rate: float = 0.05, | event_rate_change_rate: float = 0.01, | cv_mean: float = 100, | cv_sd: float = 20, | cv_noise_sd: float = 3, | cv_change_over_time: float = -2, | patient_id_start_idx: int = 1, | relationship_coefficient: float = 0.1, | seed: int = 2948 | ) | Initialize self. See help(type(self)) for accurate signature. | | format_output( | self, | event_df=<class 'pandas.core.frame.DataFrame'>, | visit_cv_df=<class 'pandas.core.frame.DataFrame'>, | patient_ids=typing.List[int], | format='long' | ) | | plot_continous_variable_over_time( | self, | cv_plot_df: pandas.core.frame.DataFrame, | event_plot_df: pandas.core.frame.DataFrame | ) | | run_cox_timevary_cox_propotional_hazard( | self, | cv_data: pandas.core.frame.DataFrame, | death_data: pandas.core.frame.DataFrame | ) | | run_simulation( | self, | patient_ids: List[int], | arm_name: str, | arm_n: int, | event_rate: float, | event_rate_change_rate: float, | cv_mean: float, | cv_sd: float, | cv_std_change_over_time: float, | cv_noise_sd: float, | relationship_coefficient: float | ) | | simulate(self) | | ---------------------------------------------------------------------- | Data descriptors defined here: | | __dict__ | dictionary for instance variables | | __weakref__ | list of weak references to the object
Vignette 1: Demonstrate how to modulate chages in the continuous variable over time¶
For this simulation, we set the baseline event rate to zero to isolate how modifying the cv_change_over_time changes the trajectory of the continuous variable across study visits
In [46]:
# Low Dependency Simulation
importlib.reload(simulation)
pio.renderers.default = 'notebook'
# Instantiate simulation object and parameters
n = 500
change_sim_obj = simulation.Simulation(sim_name='change_over_time',
n = n,
n_visits = 10,
event_rate = 0,
event_rate_change_rate = 0,
cv_mean = 100,
cv_sd = 10,
cv_noise_sd = 3,
cv_change_over_time = -2, # CHANGE 2 STDEVs over course of study
patient_id_start_idx = 1,
relationship_coefficient = .1,
)
# No change over time
nochange_sim_obj = simulation.Simulation(sim_name='nochange_over_time',
n = n,
n_visits = 10,
event_rate = 0,
event_rate_change_rate = 0,
cv_mean = 100,
cv_sd = 10,
cv_noise_sd = 3,
cv_change_over_time = 0, # CHANGE 0 STDEVs over course of study
relationship_coefficient = .1,
patient_id_start_idx = n+1
)
# run the simulation
cv_data_change, death_data_change = change_sim_obj.simulate()
cv_data_nochange, death_data_nochange = nochange_sim_obj.simulate()
# concetaenate the dataframes for plotting
cv_plot_df = pd.concat([cv_data_change, cv_data_nochange])
death_plot_df = pd.concat([death_data_change, death_data_nochange])
# make the figure
fig = change_sim_obj.plot_continous_variable_over_time(cv_plot_df, death_plot_df)
fig.show()
/Users/kevinanderson/Projects/az_takehome/problem_2/simulation.py:271: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
Vignette 2: Demonstrate how to modulate risk of death over time¶
For this simulation, the baseline event rate is set to 1% for both runs. However, the Event Rate Increase simulation models a 2% increase in the event rate over time
In [55]:
# Low Dependency Simulation
pio.renderers.default = 'notebook'
importlib.reload(simulation)
# Instantiate simulation object and parameters
n = 500
change_sim_obj = simulation.Simulation(sim_name='Event Rate Increase',
n = n,
n_visits = 10,
event_rate = 0.01, # 1% event rate
event_rate_change_rate = 0.02, # 2% increase in event rate over time
cv_mean = 100,
cv_sd = 10,
cv_noise_sd = 0,
cv_change_over_time = -1,
patient_id_start_idx = 1,
relationship_coefficient = 0,
)
# No change over time
nochange_sim_obj = simulation.Simulation(sim_name='Event Rate Constant',
n = n,
n_visits = 10,
event_rate = 0.01, # 1% event rate
event_rate_change_rate = 0, # NO CHANGE in event rate over time
cv_mean = 100,
cv_sd = 10,
cv_noise_sd = 0,
cv_change_over_time = -1,
relationship_coefficient = 0,
patient_id_start_idx = n+1
)
# run the simulation
cv_data_change, death_data_change = change_sim_obj.simulate()
cv_data_nochange, death_data_nochange = nochange_sim_obj.simulate()
# concetaenate the dataframes for plotting
cv_plot_df = pd.concat([cv_data_change, cv_data_nochange])
death_plot_df = pd.concat([death_data_change, death_data_nochange])
# make the figure
fig = change_sim_obj.plot_continous_variable_over_time(cv_plot_df, death_plot_df)
# change figure dimensions
fig.update_layout(
height=1000,
)
pio.renderers.default = 'notebook'
fig
/Users/kevinanderson/Projects/az_takehome/problem_2/simulation.py:271: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
Problem 2: Model High vs Low dependence between the continuous variable and the event (i.e. death)
- Relationship coefficient is essentially a beta within a logistic regression function.
- For exampke, if the relationship_coefficient=0.10, then a 1 unit increase in the CV would lead to a 0.10 increase in the log-odds of the event.
- I also display the results of a time-varying cox proportinoal hazards model to verify that
High Dependencescenario produces stronger dependence between the CV and the binary event.
In [58]:
# Low Dependency Simulation
pio.renderers.default = 'notebook'
importlib.reload(simulation)
# Instantiate simulation object and parameters
n = 500
high_sim_obj = simulation.Simulation(sim_name='High Dependence',
n = n,
n_visits = 10,
event_rate = 0.02,
event_rate_change_rate = 0.001,
cv_mean = 100,
cv_sd = 10,
cv_noise_sd = 1,
cv_change_over_time = -2,
patient_id_start_idx = 1,
relationship_coefficient = 0.10,
seed=4444,
)
# No change over time
nochange_sim_obj = simulation.Simulation(sim_name='No Dependence',
n = n,
n_visits = 10,
event_rate = 0.02,
event_rate_change_rate = 0.001,
cv_mean = 100,
cv_sd = 10,
cv_noise_sd = 1,
cv_change_over_time = -2,
relationship_coefficient = 0.01,
patient_id_start_idx = n+1,
seed=5555,
)
# run the simulation
cv_data_high, death_data_high = high_sim_obj.simulate()
cv_data_low, death_data_low = nochange_sim_obj.simulate()
# concetaenate the dataframes for plotting
cv_plot_df = pd.concat([cv_data_high, cv_data_low])
death_plot_df = pd.concat([death_data_high, death_data_low])
# make the figure
fig = change_sim_obj.plot_continous_variable_over_time(cv_plot_df, death_plot_df)
cox_res_high = high_sim_obj.run_cox_timevary_cox_propotional_hazard(cv_data_high, death_data_high)
cox_res_high = pd.DataFrame(cox_res_high.summary)
cox_res_high.insert(0, 'sim_name', 'High Dependence')
cox_res_low = high_sim_obj.run_cox_timevary_cox_propotional_hazard(cv_data_low, death_data_low)
cox_res_low = pd.DataFrame(cox_res_low.summary)
cox_res_low.insert(0, 'sim_name', 'Low Dependence')
res_df = pd.concat([cox_res_high, cox_res_low])
display(pd.DataFrame(res_df))
# change figure dimensions
fig.update_layout(
height=1000,
)
pio.renderers.default = 'notebook'
fig
/Users/kevinanderson/Projects/az_takehome/problem_2/simulation.py:271: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
| sim_name | coef | exp(coef) | se(coef) | coef lower 95% | coef upper 95% | exp(coef) lower 95% | exp(coef) upper 95% | cmp to | z | p | -log2(p) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| covariate | ||||||||||||
| continuous_measure | High Dependence | -0.094442 | 0.909881 | 0.007031 | -0.108223 | -0.080661 | 0.897428 | 0.922506 | 0.0 | -13.431775 | 3.938584e-41 | 134.221375 |
| continuous_measure | Low Dependence | 0.003246 | 1.003251 | 0.009348 | -0.015076 | 0.021567 | 0.985037 | 1.021801 | 0.0 | 0.347206 | 7.284369e-01 | 0.457124 |
In [ ]: